deepspeed --num_gpus 1 src/train.py \
    --stage sft \
    --deepspeed examples/deepspeed/ds_z2_config.json \
    --do_train \
    --model_name_or_path "/data/models/Qwen2.5-14B-Instruct" \
    --dataset lottery_sft,identity \
    --template qwen \
    --finetuning_type lora \
    --lora_target q_proj,v_proj \
    --output_dir Qwen2.5-14B-sft_1_b64_g16_c8192_z2 \
    --preprocessing_num_workers 16 \
    --per_device_train_batch_size 64 \
    --gradient_accumulation_steps 16 \
    --per_device_eval_batch_size 2 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --warmup_steps 20 \
    --warmup_ratio 0.1 \
    --lora_rank 64 \
    --lora_alpha 128 \
    --lora_dropout 0.05 \
    --save_steps 1000 \
    --eval_steps 1000 \
    --learning_rate 5e-5 \
    --save_total_limit 10 \
    --plot_loss \
    --fp16 \
    --cutoff_len 8192 \
    --num_train_epochs 10.0